Spatial Data Table

D Cooley

2017-06-27

library(microbenchmark)
library(spatialdatatable)
library(googleway)
## 
## Attaching package: 'googleway'
## The following objects are masked from 'package:spatialdatatable':
## 
##     decode_pl, encode_pl
library(data.table)
library(geosphere) ## for compariing results
## Loading required package: sp

Haversine Distance

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt1 <- copy(dt)
dt2 <- copy(dt)

microbenchmark(
    sdt = { dt1[, dtDistance := dtHaversine(lat1, lon1, lat2, lon2)]  },
    geo = { dt2[, geoDistance := distHaversine(matrix(c(lon1, lat1), ncol = 2),
                                   matrix(c(lon2, lat2), ncol = 2))]  }
)
## Unit: milliseconds
##  expr     min       lq     mean   median       uq       max neval
##   sdt 1.42753 1.506079 1.797605 1.609220 1.705943  6.717264   100
##   geo 3.35353 4.067097 7.324800 5.947944 6.415174 76.608145   100
dt1
##        lat1 lon1 lat2 lon2 dtDistance
##     1:   21 -172   55  -42   10341893
##     2:   89   55   66   76    2568034
##     3:  -76  -28   47   64   15103268
##     4:   56 -118   12   56   12447184
##     5:   86 -137  -36   16   14420491
##    ---                               
##  9996:  -32 -158   83  -24   14105276
##  9997:  -52   78  -86  -84    4655348
##  9998:    1  102   47  -78   14694173
##  9999:  -60  -55  -26    4    5825474
## 10000:   10 -158  -72  126   10603344

Comparison of distance calculations

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt[, idx := .I]
dt[, distEuclid := dtEuclidean(lat1, lon1, lat2, lon2)]
dt[, distHaversine := dtHaversine(lat1, lon1, lat2, lon2)]
dt[, distCosine := dtCosine(lat1, lon1, lat2, lon2)]

Bearing

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt1 <- copy(dt)
dt2 <- copy(dt)

microbenchmark(
    sdt = { dt1[, dtBearing := dtBearing(lat1, lon1, lat2, lon2)]  },
    geo = { dt2[, geoBearing := bearing(matrix(c(lon1, lat1), ncol = 2),
                                   matrix(c(lon2, lat2), ncol = 2))]  }
)
## Unit: milliseconds
##  expr      min        lq      mean    median        uq       max neval
##   sdt  1.35496  1.448837  1.607494  1.542266  1.663333  4.023592   100
##   geo 12.78782 13.926918 16.771924 15.345428 16.229512 89.279402   100
dt1
##        lat1 lon1 lat2 lon2     dtBearing
##     1:   21 -172   55  -42  2.610066e+01
##     2:   89   55   66   76  1.581615e+02
##     3:  -76  -28   47   64  7.728122e+01
##     4:   56 -118   12   56  6.322859e+00
##     5:   86 -137  -36   16  2.844239e+01
##    ---                                  
##  9996:  -32 -158   83  -24  6.278030e+00
##  9997:  -52   78  -86  -84 -1.781474e+02
##  9998:    1  102   47  -78 -9.410992e-08
##  9999:  -60  -55  -26    4  7.672887e+01
## 10000:   10 -158  -72  126 -1.624762e+02

Midpoint

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt[, c("midLat", "midLon") := dtMidpoint(lat1, lon1, lat2, lon2)] 
dt
##        lat1 lon1 lat2 lon2    midLat     midLon
##     1:   21 -172   55  -42  58.71023 -134.12349
##     2:   89   55   66   76  77.53177   75.15296
##     3:  -76  -28   47   64 -18.46358   44.25426
##     4:   56 -118   12   56  67.66393   48.11442
##     5:   86 -137  -36   16  28.73039   13.57198
##    ---                                         
##  9996:  -32 -158   83  -24  31.05043 -151.44902
##  9997:  -52   78  -86  -84 -72.88747   75.75280
##  9998:    1  102   47  -78  67.00000  102.00000
##  9999:  -60  -55  -26    4 -46.60747  -16.33706
## 10000:   10 -158  -72  126 -35.22140 -173.80055

Simplifying Polylines

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)
object.size(dt_melbourne)
## 962080 bytes
## first simplification with a 10 metre tolerance
dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 10, type = "complex"),
                         by = .(polygonId, pathId)]
object.size(dt_melbourne)
## 434680 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)

dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 100, type = "complex"),
                         by = .(polygonId, pathId)]

object.size(dt_melbourne)
## 300504 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)

dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 1000, type = "complex"),
                         by = .(polygonId, pathId)]

object.size(dt_melbourne)
## 250240 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

Nearest Points

library(spatialdatatable)
library(googleway)
library(data.table)

dt_stops <- as.data.table(tram_stops)
dt_route <- as.data.table(tram_route)

dt_nearest <- dtNearestPoints(dt1 = copy(dt_route),
                                                            dt2 = copy(dt_stops),
                                                            dt1Coords = c("shape_pt_lat", "shape_pt_lon"),
                                                            dt2Coords = c("stop_lat","stop_lon"))


## create a polyline between the joined pairs of coordinates
# 
# dt_nearest[, polyline := gepaf::encodePolyline(data.frame(c(dt_nearest[, shape_pt_lat.x], dt_nearest[, stop_lat.y]),
#                                                                                                                   c(dt_nearest[, shape_pt_lon.x], dt_nearest[, stop_lon.y])))]


pl <- sapply(1:nrow(dt_nearest), function(x){
    lats <- dt_nearest[x, c(shape_pt_lat.x, stop_lat.y)]
    lons <- dt_nearest[x, c(shape_pt_lon.x, stop_lon.y)]
    polyline = encode_pl(lat = lats,lon = lons)
})


dt_nearest[, polyline := pl ]


# mapKey <- symbolix.utils::mapKey()

# google_map(key = mapKey) %>%
#   #add_circles(data = dt_route, lat = "shape_pt_lat", lon = "shape_pt_lon", fill_colour = "#FF00FF", stroke_weight = 0) %>%
#   add_markers(data = dt_stops, lat = "stop_lat", lon = "stop_lon") %>%
#   add_polylines(data = dt_route, lat = "shape_pt_lat", lon = "shape_pt_lon") %>%
#   add_circles(data = dt_nearest, lat = "shape_pt_lat.x", lon = "shape_pt_lon.x", stroke_weight = 0, radius = 20) %>%
#   add_polylines(data = dt_nearest, polyline = "polyline", stroke_colour = "#000000")
#   #add_circles(data = dt_stops, lat = "stop_lat", lon = "stop_lon", fill_colour = "#00FF00", stroke_weight = 0)